#This to obtain:
#table_A10.tex


library("broom")
library("dplyr")
library("grid")
library("tidyr")
library("sandwich")
library("lfe")
library("broom")
library("dplyr")
library("tidyr")
library("grid")
library("stargazer")
library("lmtest")
library("multiwayvcov")
library("ggplot2")
library("glue")
library("spdep")
library("spatialreg")
library("readxl")


#!!Run Step0 first


setwd("/Users/bgpopescu/")
#setwd("C:/Users/bogdanp/")


#Reading Polygons
x<-st_layers(dsn="./Dropbox/Legacies_Project/Analysis/data/data.gdb")


HRV_adm0 <- st_read(dsn="./Dropbox/Legacies_Project/Analysis/data/data.gdb",
                    layer="HRV_adm0_wgs")

HRV_adm3 <- st_read(dsn="./Dropbox/Legacies_Project/Analysis/data/data.gdb",
                    layer="HRV_adm3")
HRV_adm3<-st_simplify(HRV_adm3, dTolerance = 0.005)
HRV_adm3<-subset(HRV_adm3, select = c(ID_2))
HRV_adm3_data = read_excel("./Dropbox/Legacies_Project/Analysis/data/merge.xlsx", sheet=1, col_names = TRUE, skip = 0)


final_sp<-left_join(HRV_adm3, HRV_adm3_data, by = c("ID_2"="ID_2"))
final_sp<-subset(final_sp, bosnia_border_mun==0)


#################################
#Function for spatial regression#
#################################
spatial_regression <- function(data_sample, y, x, unique_observation_id) {
  clean_x = clean_list(x, "quad")
  clean_x = clean_list(clean_x, "bfe")
  
  specification = as.formula(paste(glue("{y}~"), 
                                   paste(clean_x, collapse = "+")))
  bw<-subset(data_sample, select =c(x, y, unique_observation_id))
  #Removing NAs
  bw<-bw[!sf::st_is_empty(bw), ]
  bw<-na.omit(bw)
  #Step1: the function "poly2nb" builds a neighbours list based on
  list_nb=poly2nb(bw, row.names = dplyr::pull(bw, unique_observation_id))
  #Step2: Convert nb to listw type
  list_nb_w=tryCatch(nb2listw(list_nb, zero.policy=TRUE), error=function(err) NA)
  #Step3: Spatial Filtering
  reg1_res_sp<-tryCatch(SpatialFiltering(specification,
                                         data = bw,
                                         nb = list_nb,
                                         style="W",
                                         ExactEV = F,
                                         zero.policy = T,
                                         na.action=na.exclude),
                        error=function(err) NA)
  
  print("Done Spatial Filtering")
  #Step4: Include fitted results"
  bw_fitted<-tryCatch(cbind(bw, as.data.frame(fitted(reg1_res_sp)),
                            id = row.names(bw)), error=function(err) 
                            {print("NO NEIGHBORS"); data_sample})
  #Keeping track of indices
  #This will allow us to see exactly which observations were used in the regressions
  row.names(bw_fitted) <- bw_fitted$id
  bw_fitted<-subset(bw_fitted, select = -c(id))
  list_fitted_ei<-names(bw_fitted[, grepl("vec", names(bw_fitted))])
  list_fitted_ei<-list_fitted_ei[list_fitted_ei != "Shape"]
  all_vars<-append(x, list_fitted_ei)
  specification_with_eigenv = as.formula(paste(glue("{y}~"), 
                                               paste(all_vars, collapse = "+")))
  
  print(specification_with_eigenv)
  model1 <- tryCatch(lm(specification_with_eigenv, data = bw_fitted, na.action = na.exclude), error=function(err) NA)
  
  reg_moran<-moran.test(x=resid(model1),listw=list_nb_w, zero.policy = T)$statistic
  reg_moran_p<-moran.test(x=resid(model1),listw=list_nb_w, zero.policy = T)$p.value
  reg_moran_star<-paste(round(reg_moran, digits = 3), "",
                        ifelse(reg_moran_p<.01,"***",
                               ifelse(reg_moran_p<.05,"**",
                                      ifelse(reg_moran_p<.1,"*",
                                             ""))), "", "", sep = "")
  
  
  return(list(model1, reg_moran_star))
}


######################################
#Function for cleaning the list of FE#
#####################################

clean_list <- function(old_list, particle) {
  term <- paste(glue("^{particle}"))
  new_list = list() 
  for (i in old_list) {
    #print(i)
    temp_list = c()
    
    for (j in i){
      if (!grepl(term, j)){
        temp_list <- c(temp_list, j)
      }
    }
    l = length(new_list)
    new_list[[l+1]] <- temp_list
  }
  return(new_list)}

#clean_list(specifications, "SN_")


###############################
#Function for rerunning models#
###############################
rerun_regression<-function(regression_name, string_dv, orig_data_for_clusters, cluster_name){
  #Process:
  #-takes already existing regression objects, associates them with an 
  #"official" label, calculates robust clustered SE and CI based on a cluster
  #that is in the original dataset
  #and puts the results in a tibble
  #-this will be used for ggplot later
  
  #Input:
  #-Regression model object after lm
  #-The label for the DV that you want displayed in ggplot
  #-original dataset that contains thhe names of the clusters
  
  #Output:
  #-A tibble with regression coefficients from the model that you ran with:
  #--robust SE
  #--robust CI
  
  #Recreate dataframe from the regression object
  remake_data<-regression_name$model
  #Scale the DV
  remake_data[1]<-scale(remake_data[1])
  #Take the string of the DV and IV
  dv<-regression_name$terms[[2]]
  ivs<-regression_name$terms[[3]]
  ivs_str<-paste(ivs)[2]
  dv_str<-paste(dv, "~", collapse = "")
  dv_ivs_str<-paste(dv_str, ivs_str, sep=" ")
  #Create specification and run model
  specification = as.formula(dv_ivs_str)
  model <- lm(specification, data = remake_data)
  no_obs<-nobs(model)
  no_obs<-paste("Obs: ", as.character(no_obs), sep = "")
  #Calculate robust SE
  model_se<-se_robust_cluster(model, orig_data_for_clusters, cluster_name)
  #Create tibble
  model_tidy<-tidy(model)
  model_tidy$no_obs<-no_obs
  model_se_tidy<-tidy(model_se)
  names(model_se_tidy)[1] <- "term"
  names(model_se_tidy)[2] <- "robust.se"
  model_tidy2<-left_join(model_tidy, model_se_tidy, by = c("term"="term"))
  
  #Calculate CI
  df_ci<-as.data.frame(ci_robust_cluster(model, orig_data_for_clusters, cluster_name))
  df_ci <- cbind(term = rownames(df_ci), df_ci)
  names(df_ci)[2] <- "lb_95"
  names(df_ci)[3] <- "ub_95"
  
  #Merge CI to the dataframe
  model_tidy3<-left_join(model_tidy2, df_ci, by = c("term"="term"))
  #Only keep the treat variable
  model_tidy4<-subset(model_tidy3, term=="treat")
  model_tidy4$dv_official<-string_dv
  return(model_tidy4)
}

se_robust_cluster <- function(x, data_sample_shape, name_cluster){
  data_model<-x$model
  data_model$in_regression<-1
  data_model<-subset(data_model, select=c(in_regression))
  #Create index
  data_model <- tibble::rowid_to_column(data_model, "index")
  data2<-tibble::rowid_to_column(data_sample_shape, "index")
  #Merge by rowname or index
  data_merged <- left_join(data_model, data2, by=c("index" = "index"))
  #Remove any missing
  data_subset<-subset(data_merged, !is.na(in_regression))
  dat_clusters<-data_subset[[name_cluster]]
  res<-coeftest(x, vcov = cluster.vcov(x, dat_clusters, force_posdef=T))[, 2]
  return(res)}

ci_robust_cluster <- function(x, data_sample_shape, name_cluster){
  data_model<-x$model
  data_model$in_regression<-1
  data_model<-subset(data_model, select=c(in_regression))
  #Create index
  data_model <- tibble::rowid_to_column(data_model, "index")
  data2<-tibble::rowid_to_column(data_sample_shape, "index")
  #Merge by rowname or index
  data_merged <- left_join(data_model, data2, by=c("index" = "index"))
  #Remove any missing
  data_subset<-subset(data_merged, !is.na(in_regression))
  dat_clusters<-data_subset[[name_cluster]]
  res_ci<-coefci(x, vcov = cluster.vcov(x, dat_clusters, force_posdef=T))
  return(res_ci)}

#Analyzing data
data<-final_sp

specifications <- list(c('treat', 
                         'POINT_X', 'POINT_Y', 
                         'zagreb_distance', 
                         'bfe1', 'bfe2', 'bfe3', 'bfe4', 'bfe5', 'bfe6', 'bfe7',
                         'quad1', 'quad2', 'quad3'))

railroads_1869_density_list<-spatial_regression(data, "railroads_1869_density", specifications[[1]], "NAME_2")
railroads_1869_density<-railroads_1869_density_list[[1]]
railroads_1869_density_m<-railroads_1869_density_list[[2]]

planned_railroads_1869_density_list<-spatial_regression(data, "planned_railroads_1869_density", specifications[[1]],"NAME_2")
planned_railroads_1869_density<-planned_railroads_1869_density_list[[1]]
planned_railroads_1869_density_m<-planned_railroads_1869_density_list[[2]]

railroads_1884_density_list<-spatial_regression(data, "railroads_1884_density", specifications[[1]], "NAME_2")
railroads_1884_density<-railroads_1884_density_list[[1]]
railroads_1884_density_m<-railroads_1884_density_list[[2]]

planned_railroads_1884_density_list<-spatial_regression(data, "planned_railroads_1884_density", specifications[[1]], "NAME_2")
planned_railroads_1884_density<-planned_railroads_1884_density_list[[1]]
planned_railroads_1884_density_m<-planned_railroads_1884_density_list[[2]]

thoroughfare_1940_density_list<-spatial_regression(data, "thoroughfare_1940_density", specifications[[1]], "NAME_2")
thoroughfare_1940_density<-thoroughfare_1940_density_list[[1]]
thoroughfare_1940_density_m<-thoroughfare_1940_density_list[[2]]

roads_1957_concrete_density_list<-spatial_regression(data, "roads_1957_concrete_density", specifications[[1]], "NAME_2")
roads_1957_concrete_density<-roads_1957_concrete_density_list[[1]]
roads_1957_concrete_density_m<-roads_1957_concrete_density_list[[2]]

roads_1957_good_density_list<-spatial_regression(data, "roads_1957_good_density", specifications[[1]], "NAME_2")
roads_1957_good_density<-roads_1957_good_density_list[[1]]
roads_1957_good_density_m<-roads_1957_good_density_list[[2]]

road_residential_density_list<-spatial_regression(data, "road_residential_density", specifications[[1]], "NAME_2")
road_residential_density<-road_residential_density_list[[1]]
road_residential_density_m<-road_residential_density_list[[2]]

road_track_density_list<-spatial_regression(data, "road_track_density", specifications[[1]], "NAME_2")
road_track_density<-road_track_density_list[[1]]
road_track_density_m<-road_track_density_list[[2]]

mean_railroads_1869_density<-round(mean(data$railroads_1869_density, na.rm=T), digits = 3)
mean_planned_railroads_1869_density<-round(mean(data$planned_railroads_1869_density, na.rm=T), digits = 3)
mean_railroads_1879_density<-round(mean(data$railroads_1879_density, na.rm=T), digits = 3)
mean_planned_railroads_1879_density<-round(mean(data$planned_railroads_1879_density, na.rm=T), digits = 3)
mean_part_approv_railroads_1879_density<-round(mean(data$part_approv_railroads_1879_density, na.rm=T), digits = 3)
mean_railroads_1884_density<-round(mean(data$railroads_1884_density, na.rm=T), digits = 3)
mean_planned_railroads_1884_density<-round(mean(data$planned_railroads_1884_density, na.rm=T), digits = 3)
mean_part_approv_railroads_1884_density<-round(mean(data$part_approv_railroads_1884_density, na.rm=T), digits = 3)
mean_thoroughfare_1940_density<-round(mean(data$thoroughfare_1940_density, na.rm=T), digits = 3)
mean_roads_1957_concrete_density<-round(mean(data$roads_1957_concrete_density, na.rm=T), digits = 3)
mean_roads_1957_good_density<-round(mean(data$roads_1957_good_density, na.rm=T), digits = 3)
mean_road_residential_density<-round(mean(data$road_residential_density, na.rm=T), digits = 3)
mean_road_track_density<-round(mean(data$road_track_density, na.rm=T), digits = 3)


sd_railroads_1869_density<-round(sd(data$railroads_1869_density, na.rm=T), digits = 3)
sd_planned_railroads_1869_density<-round(sd(data$planned_railroads_1869_density, na.rm=T), digits = 3)
sd_railroads_1879_density<-round(sd(data$railroads_1879_density, na.rm=T), digits = 3)
sd_planned_railroads_1879_density<-round(sd(data$planned_railroads_1879_density, na.rm=T), digits = 3)
sd_part_approv_railroads_1879_density<-round(sd(data$part_approv_railroads_1879_density, na.rm=T), digits = 3)
sd_railroads_1884_density<-round(sd(data$railroads_1884_density, na.rm=T), digits = 3)
sd_planned_railroads_1884_density<-round(sd(data$planned_railroads_1884_density, na.rm=T), digits = 3)
sd_part_approv_railroads_1884_density<-round(sd(data$part_approv_railroads_1884_density, na.rm=T), digits = 3)
sd_thoroughfare_1940_density<-round(sd(data$thoroughfare_1940_density, na.rm=T), digits = 3)
sd_roads_1957_concrete_density<-round(sd(data$roads_1957_concrete_density, na.rm=T), digits = 3)
sd_roads_1957_good_density<-round(sd(data$roads_1957_good_density, na.rm=T), digits = 3)
sd_road_residential_density<-round(sd(data$road_residential_density, na.rm=T), digits = 3)
sd_road_track_density<-round(sd(data$road_track_density, na.rm=T), digits = 3)



column_labels<-c("\\shortstack{Railroad\\\\ Density\\\\ 1869}",
             "\\shortstack{Planned Railroad\\\\ Density\\\\ 1869}",
             "\\shortstack{Planned Railroad\\\\ Density\\\\ 1884}",
             "\\shortstack{Thoroughfare\\\\ Density\\\\ 1940}",
             "\\shortstack{Asphalt Road\\\\ Density\\\\ 1957}",
             "\\shortstack{Good Road\\\\ Density\\\\ 1957}",
             "\\shortstack{Residential Road\\\\ Density\\\\ 2017}",
             "\\shortstack{Road Track\\\\ Density\\\\ 2017}")



list_models<-list(railroads_1869_density, 
                  planned_railroads_1869_density,
                  planned_railroads_1884_density,
                  thoroughfare_1940_density,
                  roads_1957_concrete_density, 
                  roads_1957_good_density,
                  road_residential_density, 
                  road_track_density)



roads_sign<-stargazer(list_models,
                     digits = 3,
                     column.labels= column_labels,
                     keep = c("treat"),
                     se = list(se_robust_cluster(railroads_1869_density, data, "NAME_2"),
                               se_robust_cluster(planned_railroads_1869_density, data, "NAME_2"),
                               se_robust_cluster(planned_railroads_1884_density, data, "NAME_2"),
                               se_robust_cluster(thoroughfare_1940_density, data, "NAME_2"),
                               se_robust_cluster(roads_1957_concrete_density, data, "NAME_2"),
                               se_robust_cluster(roads_1957_good_density, data, "NAME_2"),
                               se_robust_cluster(road_residential_density, data, "NAME_2"),
                               se_robust_cluster(road_track_density, data, "NAME_2")),
                     covariate.labels=c("Military Territory"),
                     dep.var.caption = "Dependent variable",
                     dep.var.labels.include = F,
                     omit.stat=c("LL","ser","f", "rsq"), no.space=TRUE, float=FALSE,
                     add.lines=list(c("Mean", mean_railroads_1869_density, 
                                      mean_planned_railroads_1869_density,
                                      mean_planned_railroads_1884_density,
                                      mean_thoroughfare_1940_density,
                                      mean_roads_1957_concrete_density, 
                                      mean_roads_1957_good_density,
                                      mean_road_residential_density, 
                                      mean_road_track_density),
                                    c("SD", sd_railroads_1869_density, 
                                      sd_planned_railroads_1869_density,
                                      sd_planned_railroads_1884_density,
                                      sd_thoroughfare_1940_density,
                                      sd_roads_1957_concrete_density, 
                                      sd_roads_1957_good_density,
                                      sd_road_residential_density, 
                                      sd_road_track_density),
                                    c("Boundary FE", rep("Yes", 10)),
                                    c("Region FE", rep("Yes", 10)),
                                    c("Moran eigenvectors", rep("Yes", 10)),
                                    c("Moran's I Residual", 
                                      railroads_1869_density_m, 
                                      planned_railroads_1869_density_m,
                                      planned_railroads_1884_density_m,
                                      thoroughfare_1940_density_m,
                                      roads_1957_concrete_density_m, 
                                      roads_1957_good_density_m,
                                      road_residential_density_m, 
                                      road_track_density_m)))

relevant_text<-"The unit of analysis is the 2011 municipality. 
Data is based on \\citetarch{EigenthumVerlag1869, EigenthumVerlag1884}, 
\\citetpri{Jugoslawien1940, BohinecandPlanina1957} and Open Street Maps. 
Every regression includes the following covariates:
a linear polynomial in latitude and longitude, distance to Zagreb,
and boundary segment FE."

note_latex <- paste("\\multicolumn{9}{l} {\\parbox[t]{30cm}{ \\footnotesize \\textit{Notes:} 
Coefficients and robust standard errors in parantheses from OLS regression. 
$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01.", relevant_text, "}}", sep=" ")
roads_sign[grepl("Note", roads_sign)] <- note_latex
cat(roads_sign, file="./Dropbox/Legacies_Project/Paper/tables/roads/table_A10.tex", sep="\n")
